MonolayerCultures / src / Oligodendroglia / [RC17]DE_Analysis_of_Oligodendroglia.Rmd
[RC17]DE_Analysis_of_Oligodendroglia.Rmd
Raw
---
title: '[RC17]DE Analysis of Oligodendroglia'
author: "Nina-Lydia Kazakou"
date: "15/03/2022"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

# Set-up

### Load libraries

```{r message=FALSE, warning=FALSE}
library(SingleCellExperiment)
library(Seurat)
library(tidyverse)
library(Matrix)
library(scales)
library(cowplot)
library(RCurl)
library(scDblFinder)
library(Matrix)
library(ggplot2)
library(edgeR)
library(dplyr)
library(ggsci)  
library(here)
library(gtools)
library(viridis)
```

### Colour Palette

```{r load_palette}
mypal <- pal_npg("nrc", alpha = 0.7)(10)
mypal2 <-pal_tron("legacy", alpha = 0.7)(7)
mypal3<-pal_lancet("lanonc", alpha = 0.7)(9)
mypal4<-pal_simpsons(palette = c("springfield"), alpha = 0.7)(16)
mypal5 <- pal_rickandmorty(palette = c("schwifty"), alpha = 0.7)(6)
mypal6 <- pal_futurama(palette = c("planetexpress"), alpha = 0.7)(5)
mypal7 <- pal_startrek(palette = c("uniform"), alpha = 0.7)(5)
mycoloursP<- c(mypal, mypal2, mypal3, mypal4)
```

### Load object

```{r}
oligos <- readRDS(here("Data", "Processed", "Oligodendroglia", "hES-derived_monolayerOL.rds"))
```

```{r}
DimPlot(oligos, group.by = "ClusterID", pt.size = 0.9, label = TRUE, cols = mycoloursP[6:40]) & NoLegend()
```


# Differential Gene Expression Analysis 

I will perform differential gene expression testing within Seurat. Seurat's default is a Wilcoxon rank sum test. However, based on previous assessments of the differential gene expression methods within our lab, as well as literature (ref), we decided that the "MAST" method fits data better. 
MAST is a GLM-framework that treates cellular detection rate as a covariate that can also work within Seurat, but the MAST package should be loaded. 
MAST uses a two-part, generalized linear model for bimodal data that parameterizes both strongly non-zero or non-detectable features; the fraction of genes expressed in a cell, should be adjusted for as a source of nuisance variation. 
More details about MAST can be found here: https://genomebiology.biomedcentral.com/articles/10.1186/s13059-015-0844-5

```{r}
dir.create(here("outs", "Processed", "Oligodendroglia", "DE_Analysis"))
dir.create(here("outs", "Processed", "Oligodendroglia", "DE_Analysis", "All_Markers"))
```

## All Markers 
```{r All_Genes, eval=FALSE}
Idents(oligos) <- "ClusterID"

oligos_markers <- FindAllMarkers(oligos, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25, term.use = "MAST")

write.csv(oligos_markers, here("outs", "Processed", "Oligodendroglia", "DE_Analysis", "All_Markers", "Oligos_All_Markers_ClusterID.csv"))
```

```{r}
oligos_markers <- read.csv(here("outs", "Processed", "Oligodendroglia", "DE_Analysis", "All_Markers", "Oligos_All_Markers_ClusterID.csv"))
```


```{r Plot_top10_All_Genes}
oligos_markers %>% group_by(cluster) %>% top_n(n = 2, wt = avg_log2FC)
top10_oligos_markers <- oligos_markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC)
```

```{r Cluster_Averages}
cluster_averages <- AverageExpression(oligos, group.by = "ClusterID",
                                      return.seurat = TRUE)

cluster_averages@meta.data$cluster <- factor(levels(oligos@meta.data$ClusterID))

saveRDS(cluster_averages, here("data", "Processed", "Oligodendroglia", "OL_AvgClu.rds"))
```

```{r eval=FALSE}
dir.create(here("outs", "Processed", "Oligodendroglia", "DE_Analysis", "Plots"))
```


```{r fig.height=15, fig.width=15, message=FALSE, warning=FALSE}
DoHeatmap(object = cluster_averages, features = top10_oligos_markers$gene, label = TRUE, group.by = "cluster",  group.colors = mycoloursP[6:40], draw.lines = F, size = 3) + NoLegend() + theme(text = element_text(size = 10, face = "bold")) + scale_fill_viridis(discrete=FALSE) 
```

```{r eval=FALSE}
plot1 <- DoHeatmap(object = cluster_averages, features = top10_oligos_markers$gene, label = TRUE, group.by = "cluster",  group.colors = mycoloursP[6:40], draw.lines = F, size = 2.5) + NoLegend() + theme(text = element_text(size = 10, face = "bold")) + scale_fill_viridis(discrete=FALSE) 

pdf(here("outs", "Processed", "Oligodendroglia", "DE_Analysis", "Plots", "Plot_top10_All_Genes.pdf"), paper="a4", width=10, height=16)
print(plot1)
dev.off
```


```{r Filter_Genes}
oligos_markers <- read.csv(here("outs", "Processed", "Oligodendroglia", "DE_Analysis", "All_Markers", "Oligos_All_Markers_ClusterID.csv"))

fil_pct_1 = 0.25
fil_pct_2 = 0.1

oligos_fil_markers <- subset(oligos_markers, oligos_markers$pct.1 > fil_pct_1 & oligos_markers$pct.2 < fil_pct_2 )

write.csv(oligos_fil_markers, here("outs", "Processed", "Oligodendroglia", "DE_Analysis", "All_Markers", "Oligos_Fil_Markers_ClusterID.csv"))
```

```{r Plot_top10_Filter_Genes}
oligos_fil_markers %>% group_by(cluster) %>% top_n(n = 2, wt = avg_log2FC)
top10_oligos_fil_markers <- oligos_fil_markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC)
```


```{r fig.height=15, fig.width=15, message=FALSE, warning=FALSE}
DoHeatmap(object = cluster_averages, features = top10_oligos_fil_markers$gene, label = TRUE, group.by = "cluster",  group.colors = mycoloursP[6:40], draw.lines = F, size = 3) + NoLegend() + theme(text = element_text(size = 10, face = "bold")) + scale_fill_viridis(discrete=FALSE) 
```

```{r eval=FALSE}
plot2 <- DoHeatmap(object = cluster_averages, features = top10_oligos_fil_markers$gene, label = TRUE, group.by = "cluster",  group.colors = mycoloursP[6:40], draw.lines = F, size = 3) + NoLegend() + theme(text = element_text(size = 10, face = "bold")) + scale_fill_viridis(discrete=FALSE) 

pdf(here("outs", "Processed", "Oligodendroglia", "DE_Analysis", "Plots", "Plot_top10_fil_Genes.pdf"), paper="a4", width=10, height=16)
print(plot2)
dev.off
```

## Pairwise Marker Comparisons
```{r Pairwise_All_Cells, eval=FALSE}
clust_id_list <- list(list("CyP_1","Cyp_2"), list("Cyp_2", "CyP_1"), 
                      list("OAPC", "SPARCL1+ OLIGO_3"), list("SPARCL1+ OLIGO_3", "OAPC"),
                      list("OLIGO_1", "OLIGO_2"), list("OLIGO_2", "OLIGO_1"), 
                      list("OLIGO_1", "COP"), list("COP", "OLIGO_1"),
                      list("pri-OPC", "OPC_1"), list("OPC_1", "pri-OPC"), 
                      list("COP", "OPC_2"), list("OPC_2", "COP"),
                      list("COP", "OPC_1"), list("OPC_1", "COP"),
                      list("OPC_1", "OPC_2"), list("OPC_2", "OPC_1"),
                      list("OPC_1", "OPC_3"), list("OPC_3", "OPC_1"),
                      list("OPC_3", "OPC_2"), list("OPC_2", "OPC_3"))

pairwise_mark <- function(seur_obj, 
                             int_cols,
                             clust_id_list,
                             only_pos = TRUE,
                             min_pct = 0.25,
                             logfc_threshold = 0.25,
                             fil_pct_1 = 0.25,
                             fil_pct_2 = 0.1,
                             save_dir = getwd(),
                             test_use = "MAST"){
  for(k in 1:length(int_cols)){
    Idents(seur_obj) <- int_cols[k]
    for(i in 1: length(clust_id_list)){
      clust_mark <- FindMarkers(seur_obj, 
                                ident.1 = clust_id_list[[i]][[1]], 
                                ident.2 = clust_id_list[[i]][[2]],
                                min.pct = min_pct, 
                                test.use = test_use)
      clust_mark$clust <- clust_id_list[[i]][[1]]
      clust_mark$comp_to_clust <- clust_id_list[[i]][[2]]
      write.csv(clust_mark, 
                paste(save_dir,  
                      int_cols[k],
                      "_",
                      clust_id_list[[i]][[1]],
                      "_",
                      clust_id_list[[i]][[2]],
                      ".csv", sep = "" ))
    }
  }
}

dir.create(here("outs", "Processed", "Oligodendroglia", "DE_Analysis", "Pairwise_Comaparisons"))

pairwise_mark(oligos, 
              int_cols = "ClusterID",
              save_dir = here("outs", "Processed", "Oligodendroglia", "DE_Analysis", "Pairwise_Comaparisons"),
              clust_id_list = clust_id_list)
```


```{r Pairwise_Filter_Genes}
gen_mark_list <-function(file_dir = getwd(),
                         avg_log = 1.2,
                         pct_1 = 0.25,
                         pct_2 = 0.5,
                         pairwise = FALSE
){
  temp = list.files(path = file_dir,
                    pattern="*.csv")
  myfiles = lapply(paste(file_dir, temp, sep = "/"), read.csv)
  
  for(i in 1:length(myfiles)){
    dat <- myfiles[[i]]
    av_log_fil <- subset(dat, dat$avg_log2FC > avg_log & 
                           dat$pct.1 > pct_1 & 
                           dat$pct.2 < pct_2)
    if(pairwise == TRUE){
      
      top10 <- av_log_fil %>% top_n(10, avg_log2FC)
      top10$gene <- top10$X
    }else{
      av_log_fil$cluster <- as.character(av_log_fil$cluster)
      top10 <- av_log_fil %>% group_by(cluster) %>% 
        top_n(10, avg_log2FC)
    }
    
    if(i ==1){
      fil_genes <- top10
    }else{
      fil_genes <- rbind(fil_genes, top10)
    }
    
    fil_genes <- fil_genes[!duplicated(fil_genes$gene),]
    
  }
  
  return(fil_genes)
}
oligos_fil_pw <- gen_mark_list(file_dir = here("outs", "Processed", "Oligodendroglia", "DE_Analysis", "Pairwise_Comaparisons"),
                              pairwise = TRUE)

oligos_fil_markers <- read.csv(here("outs", "Processed", "Oligodendroglia", "DE_Analysis", "All_Markers", "Oligos_Fil_Markers_ClusterID.csv"))

int_genes <- c(oligos_fil_markers$gene, oligos_fil_pw$gene)
int_genes <- unique(int_genes)

length(int_genes)
```


```{r Plot_top100_int_genes, fig.height=100, fig.width=15, message=FALSE, warning=FALSE}
DoHeatmap(object = cluster_averages, features = int_genes, label = TRUE, group.by = "cluster",  group.colors = mycoloursP[6:40], draw.lines = F, size = 3) + NoLegend() + theme(text = element_text(size = 10, face = "bold")) + scale_fill_viridis(discrete=FALSE) 
```

```{r eval=FALSE}
plot3 <- DoHeatmap(object = cluster_averages, features = int_genes, label = TRUE, group.by = "cluster",  group.colors = mycoloursP[6:40], draw.lines = F, size = 3) + NoLegend() + theme(text = element_text(size = 10, face = "bold")) + scale_fill_viridis(discrete=FALSE) 

pdf(here("outs", "Processed", "Oligodendroglia", "DE_Analysis", "Plots", "Intersction_Genes_Fil_PW.pdf"), paper="a4", width=10, height=16)
print(plot3)
dev.off
```

#### Save VlnPlots & FeaturePlots from all int_genes to identify potentially interesting ones:

```{r VlnPlots, eval=FALSE}
dir.create(here("outs", "Processed", "Oligodendroglia", "DE_Analysis", "Int_Genes_VlnPlots"))

save_vln_plots <- function(seur_obj,
                           marker_list,
                           save_dir = getwd(),
                           numb_genes = 10,
                           plotheight = 20,
                           plotwidth = 8,
                           group_by = Idents(seur_obj),
                           pt_size = 0.1,
                           n_col = 1){
  gene_groups <- split(marker_list, 
                       ceiling(seq_along(marker_list) / numb_genes))
  
  for(i in 1:length(gene_groups)){
    
    pdf(paste(save_dir, "/clust_marker_vln_", i, ".pdf", sep=""), 
        height=plotheight, width = plotwidth)
    finalPlot<-VlnPlot(seur_obj, features = unlist(gene_groups[i]),
                       group.by = group_by, pt.size= pt_size, ncol=1)
    print(finalPlot)
    graphics.off()
    
  }
}


save_vln_plots(seur_obj= oligos, 
               marker_list = int_genes,
               save_dir = here("outs", "Processed", "Oligodendroglia", "DE_Analysis", "Int_Genes_VlnPlots"),
               group_by = "ClusterID", 
               pt_size = 0.1,
               plotheight = 30)

```

```{r FeautrePlots, eval=FALSE}
dir.create(here("outs", "Processed", "Oligodendroglia", "DE_Analysis", "Int_Genes_FeaturePlots"))

save_feat_plots <- function(seur_obj,
                            marker_list,
                            save_dir = getwd(),
                            numb_genes = 16,
                            plotheight = 18,
                            plotwidth = 20,
                            split_by = "NULL",
                            n_col = 4){
  gene_groups <- split(marker_list, 
                       ceiling(seq_along(marker_list) / numb_genes))
  
  for(i in 1:length(gene_groups)){
    if(split_by != "NULL"){
      feature_plot<-FeaturePlot(seur_obj, 
                                features = unlist(gene_groups[i]),
                                ncol = n_col, split.by = split_by)
    }else{
      feature_plot<-FeaturePlot(seur_obj, 
                                features = unlist(gene_groups[i]),
                                ncol = n_col)
    }
    
    pdf(paste(save_dir, "/clust_marker_feat_", i, ".pdf", sep=""), 
        height=plotheight, width = plotwidth)
    
    print(feature_plot)
    
    graphics.off()
  }
}

save_feat_plots(seur_obj= oligos, 
                marker_list = int_genes,
                save_dir = here("outs", "Processed", "Oligodendroglia", "DE_Analysis", "Int_Genes_VlnPlots"))

```



### Some more plots:
```{r}
annotation_genes <- c("OLIG2", "SOX10", "PDGFRA", "PCDH15", "MBP", "MOG", "HOPX", "SPARCL1", "EGFR", "DLL1", "NKX2-2", "MYC", "MKI67", "TOP2A")
```

```{r fig.height=8, fig.width=12, message=FALSE, warning=FALSE}
DoHeatmap(object = cluster_averages, features = annotation_genes, label = TRUE, group.by = "cluster",  group.colors = mycoloursP[6:40], draw.lines = F, size = 3) + NoLegend() + theme(text = element_text(size = 10, face = "bold")) + scale_fill_viridis(discrete=FALSE) 
```

```{r eval=FALSE}
plot4 <-DoHeatmap(object = cluster_averages, features = annotation_genes, label = TRUE, group.by = "cluster",  group.colors = mycoloursP[6:40], draw.lines = F, size = 3) + NoLegend() + theme(text = element_text(size = 10, face = "bold")) + scale_fill_viridis(discrete=FALSE) 

pdf(here("outs", "Processed", "Oligodendroglia", "DE_Analysis", "Plots", "Annotation_Genes.pdf"), paper="a4", width=10, height=16)
print(plot4)
dev.off
```

```{r fig.width=9, fig.height= 5}
DotPlot(oligos, features = annotation_genes, cols = c("blue", "red")) + FontSize(7) 
```

```{r eval=FALSE}
plot5 <- DotPlot(oligos, features = annotation_genes, cols = c("blue", "red")) + FontSize(5) 

pdf(here("outs", "Processed", "Oligodendroglia", "DE_Analysis", "Plots", "Annotation_Genes_DotPlot.pdf"), paper="a4", width=10, height=4)
print(plot5)
dev.off
```

```{r}
sessionInfo()
```